# coding=utf-8

import urllib2
from bs4 import BeautifulSoup


class BIQUGE:
    def __init__(self):
        self.url = 'http://www.biquge.la/book/'

    def take_book(self, book_id):
        url = self.url+str(book_id)+'/'
        soup = BeautifulSoup(urllib2.urlopen(url).read())
        soup.attrs.get('')
        category = soup.find('div', attrs={'class': 'con_top'}).getText().split(u' > ')[1]
        pic_url = soup.find('div', attrs={'id': 'fmimg'})
        pic_url = pic_url.find('img').attrs.get('src')
        info = soup.find('div', attrs={'id': 'info'})
        book_title = info.find('h1').getText()
        author = info.find('p').getText().split(u'：')[-1]

        dd = soup.findAll('dd')
        page_urls = []

        for i in dd:
            a = i.find('a')
            if not a:
                continue
            page_urls.append([a.getText(), url+a.get('href')])

        return {
            'book_id': book_id,
            'book_title': book_title,
            'author': author,
            'page_urls': page_urls,
            'category': category,
            'pic_url': pic_url,
        }

    def take_page(self, url='http://www.biquge.la/book/392/296165.html'):

        soup = BeautifulSoup(urllib2.urlopen(url).read())

        content = soup.find('div', attrs={'id': 'content'})
        
        return unicode(content)

if __name__ == '__main__':
    biquge = BIQUGE()
    # book = biquge.take_book(1)
    # print book.get('book_title'), book.get('book_id'), book.get('author'), book.get('category'), book.get('pic_url')
    # for page in book.get('page_urls'):
    #     print page[0], page[1]

